# -*- coding: utf-8 -*-
import sys
from imp import reload
if sys.version[0] == '2':
    reload(sys)
    sys.setdefaultencoding("utf-8")
import scrapy
from huxiu.items import HuxiuItem
import datetime,time

class HuxiuSpiderSpider(scrapy.Spider):
    name            = "huxiuspider"
    allowed_domains = ["huxiu.com"]
    start_urls      = (
        'http://www.huxiu.com',
    )

    def parse(self, response):
        items = []
        item  = HuxiuItem()
        item['time']  = time.strftime('%Y-%m-%d %H:%M:%S')
        item['link']  = response.url
        item['title'] = response.xpath('//title/text()').extract()[0]
        body = response.xpath('//body/node()').extract()
        # 将序列中的元素以指定的字符连接生成一个新的字符串
        item['content'] = ''.join([b for b in body])
        items.append(item)

        return items
        # filename = response.url.split("/")[-1] + '.html'
        # with open(filename, 'wb') as f:
        #     f.write(response.body)
